Colegio Universitario de Cartago

Carrera de Big Data

BD-132 Programacion I

Proyecto #2

Realizado por: Mariel Rodriguez , Johel Barquero, Pablo Marín y Wedell Orozco

Docente: Ericka Celina Valverde Navarro

Fecha presentación de investigación:

Librerias

library(readr)#importación de datos
library(dplyr)#manipulación de datos
library(DT)#tablas interactivas
library(ggplot2)#creación de gráficos
library(tidyr)#Limpieza de datos
library(fmsb)#gráficos de radar etc
library(gridExtra)#Combinar los gráficos

Set de Datos

#Dataset original jugadores de fifa21
origi_data21 <- read_csv("players_21.csv", col_names = TRUE)
#Dataset original jugadores de fifa22
origi_data22<-read_csv("players_22.csv", col_names = TRUE)
#Dataset original jugadores de fifa15
origi_data15<-read_csv("players_15.csv", col_names = TRUE)

Selección de columnas necesarias:

fdata_21<-origi_data21[,c("short_name","player_positions","overall","potential","value_eur","age", "club_name", "league_name", "nationality_name", "preferred_foot", "pace", "shooting", "passing", "dribbling", "defending", "physic")]

fdata_22<-origi_data22[,c("short_name","player_positions","overall","potential","value_eur","age", "club_name", "league_name", "nationality_name", "preferred_foot", "pace", "shooting", "passing", "dribbling", "defending", "physic")]

Conocimiento de los datos

names(origi_data21)#Columnas del set de datos
#Visión general del set de datos
str(origi_data21)
glimpse(origi_data21)

#Nulos set de datos players_21
any(is.na(fdata_21))#Conocimiento de valores nulos
sum(is.na(fdata_21))#Suma total de valores nulos
sapply(fdata_21, function(x) sum(is.na(x)))# Contar valores NA por columna

#Nulos set de datos players_22
any(is.na(fdata_22))#Conocimiento de valores nulos
sum(is.na(fdata_22))#Suma total de valores nulos
sapply(fdata_22, function(x) sum(is.na(x)))# Contar valores NA por columna

Data y dimenciones:

Tablas de los set de datos utilizados años (2021-2022)

fdata_21
dim(fdata_21)
## [1] 18944    16
fdata_22
dim(fdata_22)
## [1] 19239    16

Hipotesis #1

¿Cuál es la mayor conformación de jugadores “Elite” en las 5 grandes ligas?

Datos del año 2021

#Ciclo for que clasifica cada jugador en categorias 
fdata_21$clasificacion <- NA

for (i in 1:nrow(fdata_21)) {
    if (fdata_21$overall[i] >= 80) {
      fdata_21$clasificacion[i] <- "Elite"
    } else if (fdata_21$overall[i] >= 60) {
      fdata_21$clasificacion[i] <- "Bueno"
    } else {
      fdata_21$clasificacion[i] <- "Promedio"
    }
}

#Datos de las 5 grandes ligas 
five_league21<-fdata_21 %>% 
  filter(league_name %in% c("Spain Primera Division","English Premier League",                              "Italian Serie A","German 1. Bundesliga",
                            "French Ligue 1") & clasificacion == "Elite" )

#Grafico de barras
ggplot(five_league21, aes(x = league_name)) + 
  geom_bar(fill = "#1f77b4", color = "black") + 
  labs(title = "Distribución de jugadores Elite por liga año 2021", x = "Ligas", y = "Número de Jugadores")+
  scale_x_discrete(labels = c("Spain Primera Division" = "La Liga (España)",
                              "English Premier League" = "Premier League (Inglaterra)",
                              "Italian Serie A" = "Serie A (Italia)",
                              "German 1. Bundesliga" = "Bundesliga (Alemania)",
                              "French Ligue 1" = "Ligue 1 (Francia)"))+
  theme(panel.background = element_rect(fill = "#C1CDC1"))

Datos del año 2022

fdata_22$clasificacion <- NA

for (i in 1:nrow(fdata_22)) {
    if (fdata_22$overall[i] >= 80) {
      fdata_22$clasificacion[i] <- "Elite"
    } else if (fdata_22$overall[i] >= 60) {
      fdata_22$clasificacion[i] <- "Bueno"
    } else {
      fdata_22$clasificacion[i] <- "Promedio"
    }
}

five_league22<-fdata_22 %>% 
  filter(league_name %in% c("Spain Primera Division","English Premier League",                              "Italian Serie A","German 1. Bundesliga",
                            "French Ligue 1") & clasificacion == "Elite" )

ggplot(five_league22, aes(x = league_name)) + 
  geom_bar(fill = "#ff7f0e", color = "black") + 
  labs(title = "Distribución de jugadores Elite por liga año 2022", x = "Ligas",       y = "Número de Jugadores")+
  scale_x_discrete(labels = c("Spain Primera Division" = "La Liga (España)",
                              "English Premier League" = "Premier League (Inglaterra)",
                              "Italian Serie A" = "Serie A (Italia)",
                              "German 1. Bundesliga" = "Bundesliga (Alemania)",
                              "French Ligue 1" = "Ligue 1 (Francia)"))+
  theme(panel.background = element_rect(fill = "#C1CDC1"))

Comparación final

five_league21$year <- 2021 
five_league22$year <-2022

union_data <- rbind(five_league21, five_league22)
                       
ggplot(union_data, aes(x = league_name, fill = as.factor(year), y = ..count..)) + geom_bar(position = "dodge") + labs(title = "Comparativa de Jugadores Elite por Liga (2021 vs 2022)", x = "Ligas", y = "Número de Jugadores") + scale_x_discrete(labels = c("Spain Primera Division" = "La Liga(España)", "English Premier League" = "Premier League(Inglaterra)", "Italian Serie A" = "Serie A(Italia)", "German 1. Bundesliga" = "Bundesliga(Alemania)", "French Ligue 1" = "Ligue 1(Francia)")) + scale_fill_discrete(name = "Año") +
scale_fill_manual(values = c("2021" = "#1f77b4", "2022" = "#ff7f0e"), name = "Año")+  
theme(panel.background = element_rect(fill = "#C1CDC1"))                      

Hipotesis #3

Conforme al potencial actual del jugador¿Cómo evolucionaría en la siguiente temporada?

#Creacion de dataframe con las habilidades de los jugadores 
data21_habil <- fdata_21 %>% 
  select(short_name,potential, everything()[11:16])

#Limpieza de los datos(Eliminancion de NA no se uso por no tener informacion sobre sus estadisticas)
fdata21_habilidades<- data21_habil %>% 
  drop_na()

#Dataframe para la prediccion de los jugadores 
fdata21_habild_predic<- fdata21_habilidades %>% 
  select(short_name, everything()[3:8])

#Ciclo con codiciones en el que se evalua el potencial del jugador 
for (i in 1:nrow(fdata21_habilidades)) { 
  if (fdata21_habilidades$potential[i] >= 90) { 
    fdata21_habild_predic[i, 2:7] <- fdata21_habild_predic[i, 2:7] + 1 } 
  else if (fdata21_habilidades$potential[i] >= 70) { 
    fdata21_habild_predic[i, 2:7] <- fdata21_habild_predic[i, 2:7] + 2 } 
  else if (fdata21_habilidades$potential[i] >= 60) { 
    fdata21_habild_predic[i, 2:7] <- fdata21_habild_predic[i, 2:7] - 1 } 
  else { fdata21_habild_predic[i, 2:7] <- fdata21_habild_predic[i, 2:7] + 0 }
}

#Pesos(Valor porcentual importancia conforme a la calificacion)
pesos <- c(pace = 0.2, shooting = 0.2, passing = 0.2, dribbling = 0.2, defending = 0.1, physic = 0.1)

# Usar sapply para calcular el overall_habilidades de la tabla de habilidades"fdata21_habilidades"
fdata21_habilidades$overall_habilidad <- sapply(1:nrow(fdata21_habilidades), function(i) {
  sum(fdata21_habilidades[i,3:8 ] * pesos)
})

# Usar sapply para calcular el overall_habilidades de la tabla de prediccion "fdata21_habild_predic"
fdata21_habild_predic$overall_habilidades <- sapply(1:nrow(fdata21_habild_predic), function(i) {
  sum(fdata21_habild_predic[i,2:7 ] * pesos)
})

print(head(fdata21_habilidades))
## # A tibble: 6 × 9
##   short_name        potential  pace shooting passing dribbling defending physic
##   <chr>                 <dbl> <dbl>    <dbl>   <dbl>     <dbl>     <dbl>  <dbl>
## 1 L. Messi                 93    85       92      91        95        38     65
## 2 Cristiano Ronaldo        92    89       93      81        89        35     77
## 3 R. Lewandowski           91    78       91      78        85        43     82
## 4 Neymar Jr                91    91       85      86        94        36     59
## 5 K. De Bruyne             91    76       86      93        88        64     78
## 6 V. van Dijk              91    76       60      71        71        91     86
## # ℹ 1 more variable: overall_habilidad <dbl>
print(head(fdata21_habild_predic))
## # A tibble: 6 × 8
##   short_name         pace shooting passing dribbling defending physic
##   <chr>             <dbl>    <dbl>   <dbl>     <dbl>     <dbl>  <dbl>
## 1 L. Messi             86       93      92        96        39     66
## 2 Cristiano Ronaldo    90       94      82        90        36     78
## 3 R. Lewandowski       79       92      79        86        44     83
## 4 Neymar Jr            92       86      87        95        37     60
## 5 K. De Bruyne         77       87      94        89        65     79
## 6 V. van Dijk          77       61      72        72        92     87
## # ℹ 1 more variable: overall_habilidades <dbl>

Comparativa del desarrollo de la temporada de los jugadores

#Datos de predicción del jugador L.Messi 
messi_predic <- fdata21_habild_predic %>% 
  slice(1) %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

#Datos del 2022 del jugador L.Messi
messi_22<-fdata_22 %>% 
  slice(1) %>% 
  select(pace,shooting,passing,dribbling,defending,physic)



vect_messi <- rbind(messi_predic, messi_22)

max_min <- rbind(rep(100,6 ), rep(0, 6), vect_messi)
colnames(max_min) <- c("pace", "shooting", "passing", "dribbling", "defending", "physic")


radarchart( 
  max_min[c(1, 2, 3, 4),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5)), 
  plwd = 4, cglcol = "grey", 
  cglty = 1, axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, vlcex = 0.8,
  title = "Comparativa de habilidades de L.Messi en 2021 y 2022"
  )

Jugador Lucas Moura

#Datos de predicción del jugador Lucas Moura 
  moura_predic <- fdata21_habild_predic %>% 
    filter(short_name == "Lucas Moura") %>% 
    select(pace, shooting, passing, dribbling, defending, physic)
  
  #Datos del 2022 del jugador Lucas Moura
  moura_2022<-fdata_22 %>% 
    filter(short_name == "Lucas Moura") %>% 
    select(pace, shooting, passing, dribbling, defending, physic)
  
  #Grafico de radar
  vect_moura <- rbind(moura_predic, moura_2022)
  
  max_min <- rbind(rep(100, 6), rep(0, 6), vect_moura)
  colnames(max_min) <- c("pace", "shooting", "passing", "dribbling", "defending", "physic")
  
  
  radarchart( 
    max_min[c(1, 2, 3, 4),], 
    axistype = 1, 
    pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9)), 
    pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5)), 
    plwd = 4, cglcol = "grey", 
    cglty = 1, axislabcol = "grey", 
    caxislabels = seq(0, 100, 20), 
    cglwd = 0.8, vlcex = 0.8,
    title = "Comparativa de habilidades de Lucas Moura en 2021 y 2022"
  )

Lucas Moura desarrollo años 2015,2021 y 2022

moura_2015 <- origi_data15 %>% 
  filter(long_name == "Lucas Rodrigues Moura da Silva") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

vect_moura_t <- rbind(moura_predic, moura_2022, moura_2015) 

max_min <- rbind(rep(100, 6), rep(0, 6), vect_moura_t)
colnames(max_min) <- colnames(vect_moura_t)

radarchart( 
  max_min[c(1, 2, 3, 4, 5),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9), rgb(0.2, 0.7, 0.3, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5), rgb(0.2, 0.7, 0.3, 0.5)), 
  plwd = 4, 
  cglcol = "grey", 
  cglty = 1, 
  axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, 
  vlcex = 0.8,
  title = "Comparativa de habilidades de Lucas Moura en 2015-2022"
)

Jugador Sadio Mane

#Datos de predicción del jugador Sadio Mané
mane_predic <- fdata21_habild_predic %>% 
  filter(short_name == "S. Mané") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

#Datos del 2022 del jugador Sadio Mané
mane_2022<-fdata_22 %>% 
  filter(short_name == "S. Mané") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

#Grafico de radar
vect_mane <- rbind(mane_predic, mane_2022)

max_min <- rbind(rep(100, 6), rep(0, 6), vect_mane)
colnames(max_min) <- c("pace", "shooting", "passing", "dribbling", "defending", "physic")


radarchart( 
  max_min[c(1, 2, 3, 4),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5)), 
  plwd = 4, cglcol = "grey", 
  cglty = 1, axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, vlcex = 0.8,
  title = "Comparativa de habilidades de S. Mané en 2021 y 2022"
)

Sadio Mane desarrollo años 2015,2021 y 2022

mane_2015 <- origi_data15 %>% 
  filter(short_name == "S. Mané") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

vect_mane_t <- rbind(mane_predic, mane_2022, mane_2015) 

max_min <- rbind(rep(100, 6), rep(0, 6), vect_mane_t)
colnames(max_min) <- colnames(vect_mane_t)

radarchart( 
  max_min[c(1, 2, 3, 4, 5),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9), rgb(0.2, 0.7, 0.3, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5), rgb(0.2, 0.7, 0.3, 0.5)), 
  plwd = 4, 
  cglcol = "grey", 
  cglty = 1, 
  axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, 
  vlcex = 0.8,
  title = "Comparativa de habilidades de S. Mané en 2015-2022"
)

Jugador Virgil van Dijk

#Datos de predicción del jugador Virgil van Dijk
vanD_predic <- fdata21_habild_predic %>% 
  filter(short_name == "V. van Dijk") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

#Datos del 2022 del jugador Virgil van Dijk
vanD_2022<-fdata_22 %>% 
  filter(short_name == "V. van Dijk") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

#Grafico de radar
vect_vanD <- rbind(vanD_predic, vanD_2022)

max_min <- rbind(rep(100, 6), rep(0, 6), vect_vanD)
colnames(max_min) <- c("pace", "shooting", "passing", "dribbling", "defending", "physic")


radarchart( 
  max_min[c(1, 2, 3, 4),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5)), 
  plwd = 4, cglcol = "grey", 
  cglty = 1, axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, vlcex = 0.8,
  title = "Comparativa de habilidades de Virgil van Dijk en 2021 y 2022"
)   

Virgil van Dijk desarrollo años 2015,2021 y 2022

vanD_2015 <- origi_data15 %>% 
  filter(short_name == "V. van Dijk") %>% 
  select(pace, shooting, passing, dribbling, defending, physic)

vect_vanD_t <- rbind(vanD_predic, vanD_2022, vanD_2015) 

max_min <- rbind(rep(100, 6), rep(0, 6), vect_vanD_t)
colnames(max_min) <- colnames(vect_vanD_t)

radarchart( 
  max_min[c(1, 2, 3, 4, 5),], 
  axistype = 1, 
  pcol = c(rgb(0.2, 0.5, 0.5, 0.9), rgb(0.8, 0.2, 0.2, 0.9), rgb(0.2, 0.7, 0.3, 0.9)), 
  pfcol = c(rgb(0.2, 0.5, 0.5, 0.5), rgb(0.8, 0.2, 0.2, 0.5), rgb(0.2, 0.7, 0.3, 0.5)), 
  plwd = 4, 
  cglcol = "grey", 
  cglty = 1, 
  axislabcol = "grey", 
  caxislabels = seq(0, 100, 20), 
  cglwd = 0.8, 
  vlcex = 0.8,
  title = "Comparativa de habilidades de Virgil van Dijk en 2015-2022"
)

Otros graficos

#PIE HABIL de los jugadores del 2022
pie_count22 <- fdata_22 %>% 
  group_by(preferred_foot) %>% 
  summarise(count = n()) %>% 
  mutate(percentage = round(100 * count / sum(count), 1))

#Grafico pastal datos del 2022
plot_pie22 <- ggplot(pie_count22, aes(x = "", y = count, fill = preferred_foot)) + 
  geom_bar(stat = "identity", width = 1) + 
  coord_polar(theta = "y") + 
  ggtitle("Distribución de Jugadores por Pie hábil(2022)") +
  theme_void() + 
  scale_fill_brewer(palette = "PuBuGn") + 
  geom_text(aes(label = paste0(percentage, "%")), position = position_stack(vjust = 0.5), size = 5)

#Pie habil de los jugadores del 2015
pie_count15 <- origi_data15 %>% 
  group_by(preferred_foot) %>% 
  summarise(count = n()) %>% 
  mutate(percentage = round(100 * count / sum(count), 1))

#Grafico pastel datos del 2015
plot_pie15 <- ggplot(pie_count15, aes(x = "", y = count, fill = preferred_foot)) + 
  geom_bar(stat = "identity", width = 1) + 
  coord_polar(theta = "y") + 
  ggtitle("Distribución de Jugadores por Pie hábil(2015)") +
  theme_void() + 
  scale_fill_brewer(palette = "PuBuGn") + 
  geom_text(aes(label = paste0(percentage, "%")), position = position_stack(vjust = 0.5), size = 5)

#Combinacion de los graficos
combined_plotff <- grid.arrange(plot_pie15, plot_pie22, ncol = 2)